library(tidyverse)
library(ggplot2)
library(plotly)
library(rpart)
# install.packages('transformr')
library(gganimate)
library('transformr')
# standard base R functionality
n <- 30
support_range <- 5
x <- support_range*runif(n=n)
x <- x-mean(x)
y <- x+runif(n=n)
# ALWAYS take the time to get this right for your audience!!
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#009E73", "#CC79A7","#D55E00", "#000000",
"#56B4E9", "#F0E442", "#0072B2", "#E69F00")
data_space_plot <- tibble(x=x,y=y) %>%
ggplot(aes(x=x,y=y,color='Data')) + geom_point() +
scale_colour_manual(values=cbbPalette)
data_space_plot + ggtitle('"Data Space"')
\[\LARGE \begin{align*} \sum_{i=1}^n (y_i - \hat y_i)^2 = {} & (\mathbf{y}-\mathbf{X} {\boldsymbol \beta})^T(\mathbf{y}-\mathbf{X} {\boldsymbol \beta})\\ = {} & \mathbf{y}^T\mathbf{y}-2{\boldsymbol \beta}^T \mathbf{X}^T\mathbf{y} + {\boldsymbol \beta}^T \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}\\ \nabla_{\mathbf{\boldsymbol \beta}} \frac{1}{2} \left( \mathbf{y}^T\mathbf{y}-2{\boldsymbol \beta}^T \mathbf{X}^T\mathbf{y}+ {\boldsymbol \beta}^T \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}\right) = {} & \frac{1}{2}(-2 \mathbf{X}^T\mathbf{y} + 2 \mathbf{X}^T \mathbf{X} {\boldsymbol \beta}) \\ = {} & \mathbf{X}^T \mathbf{X} {\boldsymbol \beta} - \mathbf{X}^T \mathbf{y}\\ = {} & \mathbf{X}^T \left(\mathbf{X} {\boldsymbol \beta} - \mathbf{y}\right) \end{align*}\]
Beta <- 0*Beta+2
alpha_learning_rate <- 0.01
step = 1
Betas_pars <- Beta %>% rename_at(step,
function(x) paste0(x, as.character(step), sep=''))
loss_pars <- c(sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
# https://en.wikipedia.org/wiki/Least_squares#Linear_least_squares
Beta <- Beta - alpha_learning_rate*t(as.matrix(X))%*%
(as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/append
loss_pars <- append(loss_pars,
sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
step = step+1
Betas_pars %>% add_column(Beta) %>%
rename_at(step,
function(x) paste0(x,as.character(step), sep='')) -> Betas_pars
# https://tibble.tidyverse.org/reference/add_row.html
Betas_pars %>% add_row() -> tmp
tmp[3,] < -loss_pars
## estimate1 estimate2 estimate3 estimate4 estimate5 estimate6 estimate7
## 3 NA NA NA NA NA NA NA
## estimate8 estimate9 estimate10
## 3 NA NA NA
tmp %>% knitr::kable(digits=2) %>%
kableExtra::kable_styling(full_width=FALSE)
| estimate1 | estimate2 | estimate3 | estimate4 | estimate5 | estimate6 | estimate7 | estimate8 | estimate9 | estimate10 |
|---|---|---|---|---|---|---|---|---|---|
| 2 | 1.55 | 1.23 | 1.01 | 0.86 | 0.75 | 0.67 | 0.62 | 0.58 | 0.56 |
| 2 | 1.51 | 1.26 | 1.13 | 1.07 | 1.03 | 1.02 | 1.01 | 1.00 | 1.00 |
| NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
# https://stackoverflow.com/questions/42158198/r-equivalent-of-pythons-np-dot-for-3d-array
# https://stackoverflow.com/questions/46843926/broadcasting-in-r/46845541#46845541
# https://stackoverflow.com/questions/37034623/simplest-way-to-repeat-matrix-along-3rd-dimension
two_var_cost_func <- function(x1, x2, y, par1.grid, par2.grid){
n <- length(y)
par1.x1 <- sweep(replicate(n, par1.grid), MARGIN=3, FUN='*', x1)
par2.x2 <- sweep(replicate(n, par2.grid), MARGIN=3, FUN='*', x2)
resid <- sweep(par1.x1+par2.x2, 3, y)
SSE <- (resid^2)
rowSums(SSE, dim=2)
}
range <- 500
Beta0_hat <- seq(-range,3*range,range/100)/range #seq(-33,33,length.out=300)#
Beta1_hat <- seq(-range,3*range,range/100)/range #seq(-100,100,length.out=300)#
Beta0_hat.grid <- outer(Beta0_hat, Beta1_hat, function(x1, x2) x1 )
Beta1_hat.grid <- outer(Beta0_hat, Beta1_hat, function(x1, x2) x2 )
SSE <- two_var_cost_func(x1=x*0+1, x2=x, y=y,
par1.grid=Beta0_hat.grid, par2.grid=Beta1_hat.grid)
# https://plotly.com/r/figure-labels/
# https://github.com/plotly/plotly.js/issues/608
plot_ly(x=Beta0_hat.grid, y=Beta1_hat.grid, z=SSE, type='surface') %>%
layout(title = '\n\nCost Function in\n"Model/Parameter Space"',
scene = list(xaxis=list(title="β₀"),
yaxis=list(title = "β"),
zaxis=list(title = "SSE"))) -> cost_function
# https://stackoverflow.com/questions/44619198/r-plot-ly-3d-graph-with-trace-line
# https://plotly.com/r/line-and-scatter/
cost_function %>%
add_trace(x = unlist(c(Betas_pars[1,])),
y = unlist(c(Betas_pars[2,])),
z = loss_pars,
type = "scatter3d",
mode = "lines+markers",
line = list(color = "red", width = 4)) -> parameter_space_plot
parameter_space_plot
(when run this live we can click through thumbnails of each of these figures)
set_names(1:10) %>% map(~ data_space_plot +
geom_abline(aes(color='Model',
intercept = Betas_pars[1,.x],
slope = Betas_pars[2,.x])) +
ggtitle('Viewing a point projection from "Model Space" into "Data Space"'))
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#009E73", "#CC79A7","#D55E00", "#000000",
"#56B4E9", "#F0E442", "#0072B2", "#E69F00")
Beta <- 0*Beta-1
# https://www.r-graph-gallery.com/line-chart-several-groups-ggplot2.html
data_space_plot + geom_abline(aes(color='Model',
intercept = Beta[1,],
slope = Beta[2,])) +
geom_point(data=tibble(x=x, yhat=as.matrix(X)%*%as.matrix(Beta)),
mapping=aes(x=x, y=yhat, color='Predictions')) +
geom_line(data=tibble(x=rep(x,2),
group=x,
yhat=as.matrix(X)%*%as.matrix(Beta) %>%
as.tibble() %>%
bind_rows(tibble(estimate=y))),
mapping=aes(x=x, group=group, y=yhat$estimate,
color='Gradients')) +
ggtitle('Re-Imagining "Data Spece" as "Prediction Space"...
...so it\'s now a "Model" or "Parameter" space visualization')
\[\LARGE \begin{align*} \sum_{i=1}^n (y_i - \hat y_i)^2 = {} & (\mathbf{y}-\mathbf{\mathbf{\hat y}})^T(\mathbf{y}-\mathbf{\mathbf{\hat y}})\\ = {} & \mathbf{y}^T\mathbf{y}-2\mathbf{y}^T \mathbf{\hat y}+\mathbf{\hat y}^T\mathbf{\hat y}\\ \nabla_{\mathbf{\hat y}} \frac{1}{2} (\mathbf{\hat y}-\mathbf{\hat y})^T(\mathbf{\hat y}-\mathbf{\hat y}) = {} & \nabla_{\mathbf{\hat y}} \frac{1}{2}(\mathbf{\hat y}^T\mathbf{\hat y} -2\mathbf{\hat y}^T \mathbf{ y}+ \mathbf{ y}^T\mathbf{ y}) \\ = {} & \frac{1}{2}(2 \mathbf{\hat y} -2\mathbf{ y}) = \mathbf{\hat y} - \mathbf{ y} \end{align*}\]
alpha_learning_rate <- 0.5
step = 1
Betas = Beta %>% rename_at(step,
function(x) paste0(x, as.character(step), sep=''))
loss_pars <- c(sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
current_yhat <- as.matrix(X) %*% as.matrix(Beta)
new_yhat = current_yhat - alpha_learning_rate*(current_yhat-y)
Beta = lm(new_yhat~x) %>% broom::tidy() %>% select(estimate)
new_yhat = as.matrix(X) %*% as.matrix(Beta)
step = step+1
Betas %>% add_column(Beta) %>%
rename_at(step,
function(x) paste0(x, as.character(step), sep='')) -> Betas
loss_pars <- append(loss_pars,
sum((as.matrix(X)%*%as.matrix(Beta)-as.matrix(y))^2))
Betas %>% add_row() -> tmp
tmp[3,]<-loss_pars
tmp %>% knitr::kable(digits=2) %>%
kableExtra::kable_styling(full_width=FALSE)
| estimate1 | estimate2 | estimate3 | estimate4 | estimate5 | estimate6 | estimate7 | estimate8 | estimate9 | estimate10 |
|---|---|---|---|---|---|---|---|---|---|
| -1.00 | -0.25 | 0.12 | 0.31 | 0.40 | 0.45 | 0.47 | 0.48 | 0.49 | 0.49 |
| -1.00 | 0.00 | 0.50 | 0.75 | 0.87 | 0.94 | 0.97 | 0.98 | 0.99 | 0.99 |
| 265.15 | 67.58 | 18.19 | 5.84 | 2.75 | 1.98 | 1.79 | 1.74 | 1.72 | 1.72 |
set_names(1:10) %>% map(~ data_space_plot +
geom_abline(aes(color='Model',
intercept=Betas[1,.x],
slope=Betas[2,.x])) +
geom_point(data=tibble(
x=x,
yhat=as.matrix(X)%*%as.matrix(Betas[,.x])),
mapping=aes(x=x, y=yhat,
color='Predictions')) +
geom_line(data=tibble(
x=rep(x,2),
group=x,
yhat=as.matrix(X)%*%as.matrix(Betas[,.x]) %>%
as.tibble() %>%
bind_rows(tibble(V1=y))),
mapping=aes(x=x, group=group,
y=yhat$V1,
color='Gradients')) +
ggtitle('Path of Gradient Descent through "Prediction Space"'))
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
parameter_space_plot %>%
add_trace(x = unlist(c(Betas[1,])),
y = unlist(c(Betas[2,])),
z = loss_pars,
type = "scatter3d",
mode = "lines+markers",
line = list(color = "red", width = 4))
\[ \LARGE \begin{align*} \mathbf{\hat y}_t = {}& \mathbf{\hat y}_{t-1}+\alpha (\mathbf{ y-\hat y}_{t-1})\\ \mathbf{\hat y}_t = {}& \mathbf{\hat y}_{t-1}+\alpha {\boldsymbol \epsilon}_{t-1}\\ \mathbf{\hat y}_t = {}& \mathbf{\hat y}_{t-1}+\alpha f({\boldsymbol \epsilon}_{t-1})\\ \mathbf{\hat y}_t = {}& \mathbf{\hat y}_{t-1}+\alpha \boldsymbol{\hat \epsilon}_{t-1} \end{align*} \] - \(\boldsymbol{\hat \epsilon}_{t-1}\) is a model - \(\mathbf{\hat y}_{t-1}\) is a model - \(\mathbf{\hat y}_{t} = \mathbf{\hat y}_{t-1}+\alpha \boldsymbol{\hat \epsilon}_{t-1}\) is another model - and \(\mathbf{\hat y}_{t}\) is closer to \(y\) than \(\mathbf{\hat y}_{t-1}\) was…
f0 <- function(x){
0*x
}
support <- seq(min(x), max(x), .01)
step=1
fhat_model <- list(f0(support))
fhat_datas <- f0(x)
data_space_plot + geom_line(data=tibble(x=support, y=fhat_model[[step]],
color='GBT'),
mapping=aes(x=x,y=y,color=color))
y_leftover <- y
# https://www.statmethods.net/advstats/cart.html
# https://www.rdocumentation.org/packages/rpart/versions/4.1-15/topics/predict.rpart
alpha_learning_rate=0.1
y_leftover <- y_leftover-fhat_datas
stump <- rpart(y~x, data=tibble(x=x, y=y_leftover),
control=rpart.control(maxdepth=2, minbucket=1, minsplit=2, cp=0))
fhat_datas <- alpha_learning_rate*predict(stump)
fhat_model <- append(fhat_model,
list(fhat_model[[step]] + alpha_learning_rate *
as.numeric(predict(stump, newdata=tibble(x=support)))))
step = step+1
data_space_plot + geom_line(data=tibble(x=support, y=fhat_model[[step]],
color='GBT'),
mapping=aes(x=x,y=y,color=color))
data_space_plot + geom_line(data=tibble(x=support, y=fhat_model[[step]],
color='GBT'),
mapping=aes(x=x,y=y,color=color))
# https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/
# https://tidyr.tidyverse.org/reference/pivot_longer.html
animation <- fhat_model %>%
map(~ tibble(.x)) %>%
bind_cols() %>%
add_column(tibble(x=support)) %>%
pivot_longer(cols=starts_with(".x"))
# https://www.rdocumentation.org/packages/stringr/versions/1.4.0/topics/str_replace
animation %>% mutate(name=as.integer(str_replace(name,'.x...',''))) %>%
ggplot(aes(x=x, y=value)) + geom_line() +
transition_time(name) + labs(title = "Step: {frame_time}") +
geom_point(data=tibble(x=x,y=y), mapping=aes(x,y))